**06/28/2021
**file to bring in CPD FOIA trr files received in 2021 & assign unique IDs
 *I combine TRR data that are received in two batches. First data runs from 2004-2016. The second runs after 2016. The data for the first batch is disaggregated across different files. I first bring together those file (main use of force info, detail action-response file, subject, and officer information), and attach the officer information (names, birth year, and appointed dates). The second batch is less disaggregated and is contained in only file.

**Housekeepig
local ver_suff _9_21
clear all
local ver_suff _9_21
clear all
if "`c(username)'"=="taehokim" |"`c(username)'"=="Taeho Kim"{ 
	cd "/Users/`c(username)'/Dropbox (Penn Law)/misconduct/do_files`ver_suff'"
} 
else{
	cd "/Users/`c(username)'/Dropbox/misconduct/do_files`ver_suff'"
}
set more off

//later pass through profiles dataset to fuzzy match 
local profiles_match `1'

*******************
*BRING IN TRR DATA -- old data
*******************
 
insheet using ../fully-unified-data/TRR/TRR-main_2004-2016_2016-09.csv

**Record year and month of the data
gen year = substr(trr_date, 1, 4)
gen month = substr(trr_date, 6, 2)
destring year month, replace
gen trr_yearmo = ym(year, month)
format %tm trr_yearmo 

tempfile trr_main
save `trr_main'  //one row per trr_id. File for general circumstances


//bring in TRR force data

clear  
 
insheet using ../fully-unified-data/TRR/TRR-actions-responses_2004-2016_2016-09.csv
//make one row summary for each TRR: collapse member + subject actions

sort trr_id person, stable
by trr_id person: gen action_all = action[1]
by trr_id person: replace action_all = action_all[_n-1] + "," + action if _n > 1
by trr_id person: replace action_all = action_all[_N]

gen mem_action_all = action_all if person == "Member Action"
gen sub_action_all = action_all if person == "Subject Action"

gsort trr_id -sub_action_all
replace sub_action_all = sub_action_all[_n-1] if trr_id == trr_id[_n-1] & sub_action_all == ""
gsort trr_id -mem_action_all
replace mem_action_all = mem_action_all[_n-1] if trr_id == trr_id[_n-1] & mem_action_all == ""

by trr_id: keep if _n == 1

keep trr_id mem_action_all sub_action_all

rename mem_action_all memact
rename sub_action_all subact

tempfile trr_force
save `trr_force'

//bring in TRR subjects data
 
clear 
 
insheet using ../fully-unified-data/TRR/TRR-subjects_2004-2016_2016-09.csv

**Rename variables
rename gender s_sex
rename race subjectrace
rename birth_year s_birthyear
rename armed subject_armed
rename injured subject_injured
rename alleged_injury subject_alleged_injury

tempfile trr_subjects
save `trr_subjects'


//bring in TRR officer data

clear  
 
insheet using ../fully-unified-data/TRR/TRR-officers_2004-2016_2016-09.csv

**Rename variables
rename injured member_injured
rename unit member_assigned_unit
rename unit_detail member_detail_unit
rename rank member_position
rename in_uniform member_in_uniform
 
tempfile trr_officers
save `trr_officers'
 
**Combine datasets
use `trr_main', clear
merge 1:1 trr_id using `trr_force'
drop _merge

merge 1:1 trr_id using `trr_subjects'
drop _merge

merge 1:1 trr_id using `trr_officers'
drop _merge

drop sr_no se_no 
rename block street_no
rename direction dir
rename street occurance_street_name

**Create exact date/time of TRR
gen datetime = trr_date + " " +trr_time 
generate double numtime = clock(datetime, "YMDhms")
format numtime %tc
drop datetime
rename numtime datetime

rename indoor_or_outdoor indoor_outdoor 

drop location
drop subject_id subject_no row_id trrofficer

//merge with old profiles

preserve

clear 
insheet using ../fully-unified-data/profiles/final-profiles.csv
tempfile profile
save `profile'

restore 
merge m:1 uid using `profile'
drop if _merge == 2
drop _merge

drop old_uid link_uid race gender resignation_date current_status current_star current_unit current_rank start_date org_hire_date profile_count cleaned_rank

rename first_name mem_first_name

rename last_name mem_last_name
replace mem_last_name = mem_last_name + " " + suffix_name
rename middle_initial mem_mi

rename birth_year m_birthyear
 
drop uid

gen double appointed_dt = date(appointed_date, "YMD")
format %td appointed_dt
drop appointed_date
rename appointed_dt appointed_date 

*******************
*BRING IN TRR DATA -- new data post 2016
*******************

preserve
clear 
import excel using ../FOIA/P506061_TRRData_Redacted_post2016.xlsx, ///
         sheet(`Data') firstrow clear case(lower)
rename trr_report_id trr_id


tempfile new_trr
save `new_trr'		 
restore 

append using `new_trr'

drop if trr_id == . 
drop age year month trr_yearmo trr_date trr_time notify_oemc notify_district_sergeant notify_op_command notify_det_division number_of_weapons_discharged party_fired_first location_recode taser total_number_of_shots firearm_used number_of_officers_using_firearm

drop location m_sex member_race subject_weap weapon_descr statute stat_descr 




*******************
*PROCESS TRR DATA FOR MATCHING
*******************


**Record names
gen fname = trim(mem_first_name)
gen lname = trim(mem_last_name)
gen mi = trim(mem_mi)

**Record appointment date
gen appoint_yr = year(appointed_date)
gen appoint_m = month(appointed_date)
gen appoint_d = day(appointed_date)		   

**Record birthyear
rename m_birthyear birthyear 

//some obs have variations of last names: deflate last names
gen lname_mod = subinstr(lname, "-", "", .)
replace lname_mod = subinstr(lname_mod, " ", "", .)
replace lname_mod = subinstr(lname_mod, ".", "", .)

**Save intermediate dataset	   
save ../processed_data`ver_suff'/FOIA_processing/trr, replace
   
**Deduplicate with respect to matchingv ariables
duplicates drop fname lname_mod mi appoint_yr appoint_m appoint_d birthyear , force
keep fname lname lname_mod mi appoint_yr appoint_m appoint_d birthyear 
sort fname lname_mod mi appoint_yr appoint_m appoint_d birthyear 

**Drop if missing name information
drop if lname == ""

**Generate preliminary ID
gen pid = _n 

**Generate name soundexes
gen fname_sdx = soundex(fname)
gen lname_sdx = soundex(lname)
 
**Save data with preliminary IDs
save ../processed_data`ver_suff'/FOIA_processing/trr_pid, replace
clear


*******************
*SET UP LIST OF DATA TO STORE MATCHED IDS
*******************
clear 

set obs 1
gen u_pid = -1
gen pid = -1

save ../processed_data`ver_suff'/FOIA_processing/pid_appending, replace
clear


*******************
*FUZZY MATCHING
*******************

use ../processed_data`ver_suff'/FOIA_processing/trr_pid, clear 

**Perfect name, appoint date, birth year match
do ../do_files`ver_suff'/fuzzy_merge_pid_Kim 2 2 1 2 2 2 2 `profiles_match' u_pid ../processed_data`ver_suff'/FOIA_processing/pid_appending 1 //10,029 so far

**Perfect name, appoint date match, 1 year birth year gap
do ../do_files`ver_suff'/fuzzy_merge_pid_Kim 2 2 1 2 2 2 1 `profiles_match' u_pid ../processed_data`ver_suff'/FOIA_processing/pid_appending 2  

**Perfect name, appoint date, and birth year match, no middle initial
do ../do_files`ver_suff'/fuzzy_merge_pid_Kim 2 2 0 2 2 2 2 `profiles_match' u_pid ../processed_data`ver_suff'/FOIA_processing/pid_appending 3  

**Perfect first name, soundex last name, appoint date, and birth year match, no middle initial
do ../do_files`ver_suff'/fuzzy_merge_pid_Kim 2 1 0 2 2 2 2 `profiles_match' u_pid ../processed_data`ver_suff'/FOIA_processing/pid_appending 4  

**Soundex both names, middle initial, and appoint date, birth year match
do ../do_files`ver_suff'/fuzzy_merge_pid_Kim 1 1 1 2 2 2 2 `profiles_match' u_pid ../processed_data`ver_suff'/FOIA_processing/pid_appending 5  

**Soundex both names, no middle initial, and appoint date, birth year match
do ../do_files`ver_suff'/fuzzy_merge_pid_Kim 1 1 0 2 2 2 2 `profiles_match' u_pid ../processed_data`ver_suff'/FOIA_processing/pid_appending 6  

**first name, no middle initial, no last name and appoint date, birth year match
do ../do_files`ver_suff'/fuzzy_merge_pid_Kim 2 0 0 2 2 2 2 `profiles_match' u_pid ../processed_data`ver_suff'/FOIA_processing/pid_appending 7  

 
**first name, last name, off in appoint_yr 
do ../do_files`ver_suff'/fuzzy_merge_pid_Kim 2 2 0 1 2 2 2 `profiles_match' u_pid ../processed_data`ver_suff'/FOIA_processing/pid_appending 9  


**first name, last name, off in appoint_mo 
do ../do_files`ver_suff'/fuzzy_merge_pid_Kim 2 2 0 2 1 2 2 `profiles_match' u_pid ../processed_data`ver_suff'/FOIA_processing/pid_appending 10  


**first name, last name, off in appoint_day 
do ../do_files`ver_suff'/fuzzy_merge_pid_Kim 2 2 0 2 2 1 2 `profiles_match' u_pid ../processed_data`ver_suff'/FOIA_processing/pid_appending 11  


**first name, last name, off in birth year 
do ../do_files`ver_suff'/fuzzy_merge_pid_Kim 2 2 0 2 2 2 1 `profiles_match' u_pid ../processed_data`ver_suff'/FOIA_processing/pid_appending 12  


**first name, last name, no other match 
do ../do_files`ver_suff'/fuzzy_merge_pid_Kim 2 2 0 3 3 3 3 `profiles_match' u_pid ../processed_data`ver_suff'/FOIA_processing/pid_appending 13  

** first name, soundex last name, no other match 
do ../do_files`ver_suff'/fuzzy_merge_pid_Kim 2 1 0 3 3 3 3 `profiles_match' u_pid ../processed_data`ver_suff'/FOIA_processing/pid_appending 14  
  
 
*******************
*KEEP BEST MATCHES
******************* 

use ../processed_data`ver_suff'/FOIA_processing/pid_appending.dta, clear

//drop the less good matches if individuals in awards data are matched with more than one in profiles

sort pid match_no u_pid
quietly by pid:  gen dup = cond(_N==1,0,_n)
sort dup
tab dup

drop if dup>1 
drop dup 
sort pid match_no

tempfile best_matches
save `best_matches'

*******************
*MERGE DE-DUPLICATED AWARDS DATA WITH MATCHINGS
******************* 

use ../processed_data`ver_suff'/FOIA_processing/trr_pid, clear

merge 1:1 pid using `best_matches', keep(1 3)  


 

